DEG<-read.csv("Data/DEA_results.csv")
names(DEG)[names(DEG) == "X"] <-"transcript_id"
# head(DEG)
# dim(DEG)
Raw_Annots<-read.csv("Data/Annotated_Transcriptome_summarisedbyTranscript_V5.tsv", sep="\t")## File toobig for GitHub. Contact me if you need it.
#dim(Raw_Annots)
#Raw_Annots
DEG_Annots<-DEG %>%
left_join(., Raw_Annots, by=("transcript_id" ) )
#DEG_Annots
Table_vst<-read.csv("Data/Table_vst.csv", row.names = 1)
Design_matrix<-data.frame(Sample_names=colnames(Table_vst), Biological_condition=sapply(strsplit(colnames(Table_vst),"_rep"), `[`, 1))
rownames(Design_matrix)<-Design_matrix$Sample_names
# Do PCA
PCA <- prcomp(t(as.data.frame(Table_vst)))
# Extract PC axes for plotting
PCAvalues <- data.frame(samples=sapply(strsplit(colnames(Table_vst),"_rep"), `[`, 1), PCA$x)
#summary(PCA)
# Plot
ggplot(PCAvalues, aes(x = PC1, y = PC2, colour = samples)) +
geom_point(size = 3)+
xlab(paste0("PC1: ",round(summary(PCA)$importance[2,"PC1"]*100,2),"% variance")) +
ylab(paste0("PC2: ",round(summary(PCA)$importance[2,"PC2"]*100,2),"% variance")) +
geom_text_repel(label = rownames(PCAvalues), size=3)+
ggtitle("PCA - All genes - VST")
List_DEA_genes<-DEG[which(DEG$padj<0.01),"transcript_id"]
#length(List_DEA_genes)
DEA_genes_vst<-Table_vst[which(rownames(Table_vst) %in% List_DEA_genes),]
#dim(DEA_genes_vst)
# Do PCA
PCA_DEgenes <- prcomp(t(as.data.frame(DEA_genes_vst)))
# Extract PC axes for plotting
#PCAvalues_DEgenes <- data.frame(samples=sapply(strsplit(colnames(DEA_genes_vst),"_rep"), `[`, 1), PCA$x)
PCAvalues_DEgenes <- data.frame(samples=sapply(strsplit(colnames(DEA_genes_vst),"_rep"), `[`, 1), PCA_DEgenes$x)
#summary(PCAvalues_DEgenes)
# Plot
ggplot(PCAvalues_DEgenes, aes(x = PC1, y = PC2, colour = samples)) +
geom_point(size = 3)+
xlab(paste0("PC1: ",round(summary(PCA_DEgenes)$importance[2,"PC1"]*100,2),"% variance")) +
ylab(paste0("PC2: ",round(summary(PCA_DEgenes)$importance[2,"PC2"]*100,2),"% variance")) +
geom_text_repel(label = rownames(PCAvalues_DEgenes), size=3)+
ggtitle("PCA - All genes - VST")
library(AnnotationForge)
Gene_Table<-data.frame(GID=DEG_Annots$transcript_id,
BlastX=DEG_Annots$sprot_Top_BLASTX_hit,
BlastP=DEG_Annots$sprot_Top_BLASTP_hit,
BlastX_Name=DEG_Annots$BlastX_ProtName,
BlastP_Name=DEG_Annots$BlastP_ProtName,
BlastSppHit=DEG_Annots$SPECIES)
GoTable<-DEG_Annots %>%
dplyr::select("transcript_id","PfamBlastGos_UNIQ_clean") %>%
separate_rows(. ,"PfamBlastGos_UNIQ_clean", sep=";") %>%
filter(!is.na(PfamBlastGos_UNIQ_clean)) %>%
filter(!str_detect(PfamBlastGos_UNIQ_clean, '\\.')) %>%
dplyr::rename(GID="transcript_id" , GO="PfamBlastGos_UNIQ_clean") %>%
tibble::add_column(EVIDENCE="IEA") %>%
as.data.frame()
## Too big for GitHub, contact me if you need the package.
# makeOrgPackage(gene_info=Gene_Table, go=GoTable,
# version = "0.1",
# maintainer="Some One <so@someplace.org>",
# author="Some One <so@someplace.org>",
# outputDir = "/home/ysland/Documents/Paracletus_project/RNA_seq_data/",
# tax_id = "223034.",
# genus = "Paracletus",
# species = "cimiciformis",
# goTable="go")
#
#
# ## then you can call install.packages based on the return value
library("org.Pcimiciformis.eg.db")
table(DEG[which(DEG$padj<0.01),]$log2FoldChange>0)
##
## FALSE TRUE
## 576 511
library(org.Pcimiciformis.eg.db)
GENESList<-list(
DEA_genes_Up_FM=DEG[which(DEG$padj<0.01 & DEG$log2FoldChange>0),"transcript_id"],
DEA_genes_Up_RM=DEG[which(DEG$padj<0.01 & DEG$log2FoldChange<0),"transcript_id"]
)
table(GENESList$DEA_genes_Up_FM %in% GoTable$GID)
table(GENESList$DEA_genes_Up_RM %in% GoTable$GID)
DE_byMorf_BP<- compareCluster(geneCluster = GENESList,
OrgDb= org.Pcimiciformis.eg.db,
keyType = 'GID',
fun = "enrichGO",
ont = "BP",
pvalueCutoff = 0.05,
pAdjustMethod= "BH",
qvalueCutoff = 0.05,
readable=FALSE)
DE_byMorf_MF<- compareCluster(geneCluster = GENESList,
OrgDb= org.Pcimiciformis.eg.db,
keyType = 'GID',
fun = "enrichGO",
ont = "MF",
pvalueCutoff = 0.05,
pAdjustMethod= "BH",
qvalueCutoff = 0.05,
readable=FALSE)
DE_byMorf_CC<- compareCluster(geneCluster = GENESList,
OrgDb= org.Pcimiciformis.eg.db,
keyType = 'GID',
fun = "enrichGO",
ont = "CC",
pvalueCutoff = 0.05,
pAdjustMethod= "BH",
qvalueCutoff = 0.05,
readable=FALSE)
#annotationDbi::select(org.Pcimiciformis.eg.db,keys=DE_genes_Groups_long$Gene,columns=c("ENTREZID", "SYMBOL","GENENAME","FLYBASECG" ),keytype="FLYBASE")
# write.csv(DE_byMorf_BP, file = "DE_p001_byMorf_BP.csv")
# write.csv(DE_byMorf_MF, file = "DE_p001_byMorf_MF.csv")
# write.csv(DE_byMorf_CC, file = "DE_p001_byMorf_CC.csv")
dotplot(DE_byMorf_BP , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01) -- BP ")
emapplot(DE_byMorf_BP,layout="nicely")+ggtitle("Significant GO terms enrich in DE genes (p<0.01) -- BP ")
DE_byMorf_BP_Simplified_level<-gofilter(DE_byMorf_BP, level=6)
dotplot(DE_byMorf_BP_Simplified_level , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01) -- BP -- Level 6")
GO0005991<-DE_byMorf_BP_Simplified_level@compareClusterResult[which(DE_byMorf_BP_Simplified_level@compareClusterResult$Description=="trehalose metabolic process"),]
GO0005991
GO0005991_list<-list(
Up_FM=unlist(strsplit(GO0005991[which(GO0005991$Cluster=="DEA_genes_Up_FM"),"geneID"], "/")),
Up_RM=unlist(strsplit(GO0005991[which(GO0005991$Cluster=="DEA_genes_Up_RM"),"geneID"], "/"))
)
VennDiagram::venn.diagram(GO0005991_list,
category.names =names(GO0005991_list),
output=TRUE,
filename = "Venn_GO0005991.png" ,
imagetype="png",
# Set names
cat.cex = 0.6,
fill = viridis(2))
## [1] 1
GO0005991_list
## $Up_FM
## [1] "TRINITY_DN73_c0_g1_i5" "TRINITY_DN73_c0_g1_i12" "TRINITY_DN73_c0_g1_i1"
## [4] "TRINITY_DN73_c0_g1_i13" "TRINITY_DN73_c0_g1_i18"
##
## $Up_RM
## [1] "TRINITY_DN73_c0_g2_i1" "TRINITY_DN7791_c0_g1_i3"
## [3] "TRINITY_DN64519_c0_g1_i1" "TRINITY_DN99_c1_g1_i8"
## [5] "TRINITY_DN7791_c0_g1_i13" "TRINITY_DN73_c0_g1_i20"
## [7] "TRINITY_DN1794_c3_g5_i1" "TRINITY_DN99_c1_g1_i15"
## [9] "TRINITY_DN99_c1_g1_i6" "TRINITY_DN1794_c3_g1_i1"
## [11] "TRINITY_DN7791_c0_g1_i12" "TRINITY_DN73_c0_g2_i2"
venn
#The simplify method apply ‘select_fun’ (which can be a user defined function) to feature ‘by’ to select one representative terms from redundant terms (which have similarity higher than ‘cutoff').
DE_byMorf_BP_Simplified <- clusterProfiler::simplify(DE_byMorf_BP, cutoff=0.7, by="p.adjust", select_fun=min)
dotplot(DE_byMorf_BP_Simplified , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01) -- BP -- Simplified")
dotplot(DE_byMorf_MF , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01) -- MF ")
emapplot(DE_byMorf_MF,layout="nicely")+ggtitle("Significant GO terms enrich in DE genes (p<0.01) -- MF ")
dotplot(DE_byMorf_CC , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01) -- CC ")
table(DEG[which(DEG$padj<0.05),]$log2FoldChange>0)
##
## FALSE TRUE
## 844 796
GENESList_p005<-list(
DEA_genes_Up_FM=DEG[which(DEG$padj<0.05 & DEG$log2FoldChange>0),"transcript_id"],
DEA_genes_Up_RM=DEG[which(DEG$padj<0.05 & DEG$log2FoldChange<0),"transcript_id"]
)
DE_byMorf_BP_p005<- compareCluster(geneCluster = GENESList_p005,
OrgDb= org.Pcimiciformis.eg.db,
keyType = 'GID',
fun = "enrichGO",
ont = "BP",
pvalueCutoff = 0.05,
pAdjustMethod= "BH",
qvalueCutoff = 0.05,
readable=FALSE)
DE_byMorf_MF_p005<- compareCluster(geneCluster = GENESList_p005,
OrgDb= org.Pcimiciformis.eg.db,
keyType = 'GID',
fun = "enrichGO",
ont = "MF",
pvalueCutoff = 0.05,
pAdjustMethod= "BH",
qvalueCutoff = 0.05,
readable=FALSE)
DE_byMorf_CC_p005<- compareCluster(geneCluster = GENESList_p005,
OrgDb= org.Pcimiciformis.eg.db,
keyType = 'GID',
fun = "enrichGO",
ont = "CC",
pvalueCutoff = 0.05,
pAdjustMethod= "BH",
qvalueCutoff = 0.05,
readable=FALSE)
dotplot(DE_byMorf_BP_p005 , showCategory=100, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.05) -- BP ")
emapplot(DE_byMorf_BP_p005,layout="nicely")+ggtitle("Significant GO terms enrich in DE genes (p<0.05) -- BP ")
#The simplify method apply ‘select_fun’ (which can be a user defined function) to feature ‘by’ to select one representative terms from redundant terms (which have similarity higher than ‘cutoff').
DE_byMorf_BP_p005_Simplified <- clusterProfiler::simplify(DE_byMorf_BP_p005, cutoff=0.7, by="p.adjust", select_fun=min)
dotplot(DE_byMorf_BP_p005_Simplified , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.05) -- BP -- Simplified")
dotplot(DE_byMorf_MF_p005 , showCategory=100, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.05) -- MF ")
emapplot(DE_byMorf_MF_p005,layout="nicely")+ggtitle("Significant GO terms enrich in DE genes (p<0.05) -- MF ")
GO0042302<-DE_byMorf_MF_p005@compareClusterResult[which(DE_byMorf_MF_p005@compareClusterResult$Description=="structural constituent of cuticle"),]
GO0042302
GO0042302_list<-list(
Up_FM=unlist(strsplit(GO0042302[which(GO0042302$Cluster=="DEA_genes_Up_FM"),"geneID"], "/")),
Up_RM=unlist(strsplit(GO0042302[which(GO0042302$Cluster=="DEA_genes_Up_RM"),"geneID"], "/"))
)
#
# VennDiagram::venn.diagram(GO0042302_list,
# category.names =names(GO0042302_list),
# output=TRUE,
# filename = "Venn_GO0042302.png" ,
# imagetype="png",
# # Set names
# cat.cex = 0.6,
# fill = viridis(2))
GO0042302_list
## $Up_FM
## [1] "TRINITY_DN24_c2_g2_i12" "TRINITY_DN1765_c0_g1_i17"
## [3] "TRINITY_DN174_c0_g1_i3" "TRINITY_DN621_c2_g1_i2"
## [5] "TRINITY_DN6368_c0_g1_i7" "TRINITY_DN174_c0_g1_i1"
## [7] "TRINITY_DN1765_c0_g1_i11" "TRINITY_DN24_c2_g2_i7"
## [9] "TRINITY_DN351_c0_g1_i36" "TRINITY_DN411_c3_g1_i19"
## [11] "TRINITY_DN11230_c0_g2_i1" "TRINITY_DN3670_c0_g1_i2"
## [13] "TRINITY_DN1765_c0_g1_i27" "TRINITY_DN1765_c0_g3_i1"
## [15] "TRINITY_DN24_c2_g5_i1" "TRINITY_DN411_c3_g1_i18"
## [17] "TRINITY_DN9298_c0_g1_i1" "TRINITY_DN6368_c1_g1_i7"
## [19] "TRINITY_DN411_c3_g1_i21" "TRINITY_DN1765_c0_g1_i31"
## [21] "TRINITY_DN3670_c0_g1_i1" "TRINITY_DN29616_c0_g1_i1"
## [23] "TRINITY_DN45166_c0_g1_i3"
##
## $Up_RM
## NULL
venn
dotplot(DE_byMorf_CC_p005 , showCategory=100, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.05) -- CC ")
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.10
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] org.Pcimiciformis.eg.db_0.1 AnnotationForge_1.30.1
## [3] AnnotationDbi_1.50.1 IRanges_2.22.2
## [5] S4Vectors_0.26.1 Biobase_2.48.0
## [7] BiocGenerics_0.34.0 stringr_1.4.0
## [9] enrichplot_1.8.1 clusterProfiler_3.16.0
## [11] ComplexHeatmap_2.5.3 RColorBrewer_1.1-2
## [13] forcats_0.5.0 ggrepel_0.8.2
## [15] ggplot2_3.3.2 viridis_0.5.1
## [17] viridisLite_0.3.0 plyr_1.8.6
## [19] tidyr_1.1.0 dplyr_1.0.2
##
## loaded via a namespace (and not attached):
## [1] fgsea_1.14.0 colorspace_1.4-1 rjson_0.2.20
## [4] ellipsis_0.3.1 ggridges_0.5.2 circlize_0.4.10
## [7] qvalue_2.20.0 futile.logger_1.4.3 GlobalOptions_0.1.2
## [10] clue_0.3-57 farver_2.0.3 urltools_1.7.3
## [13] graphlayouts_0.7.0 bit64_0.9-7.1 scatterpie_0.1.4
## [16] xml2_1.3.2 codetools_0.2-17 splines_4.0.3
## [19] GOSemSim_2.14.0 knitr_1.29 polyclip_1.10-0
## [22] jsonlite_1.7.0 cluster_2.1.0 GO.db_3.11.4
## [25] png_0.1-7 ggforce_0.3.2 BiocManager_1.30.10
## [28] compiler_4.0.3 httr_1.4.2 rvcheck_0.1.8
## [31] Matrix_1.2-18 formatR_1.7 tweenr_1.0.1
## [34] htmltools_0.5.0 prettyunits_1.1.1 tools_4.0.3
## [37] igraph_1.2.5 gtable_0.3.0 glue_1.4.2
## [40] reshape2_1.4.4 DO.db_2.9 fastmatch_1.1-0
## [43] Rcpp_1.0.5 vctrs_0.3.4 ggraph_2.0.3
## [46] xfun_0.15 lifecycle_0.2.0 XML_3.99-0.5
## [49] DOSE_3.14.0 europepmc_0.4 MASS_7.3-53
## [52] scales_1.1.1 tidygraph_1.2.0 hms_0.5.3
## [55] lambda.r_1.2.4 yaml_2.2.1 memoise_1.1.0
## [58] gridExtra_2.3 downloader_0.4 triebeard_0.3.0
## [61] stringi_1.4.6 RSQLite_2.2.0 BiocParallel_1.22.0
## [64] shape_1.4.4 rlang_0.4.7 pkgconfig_2.0.3
## [67] bitops_1.0-6 evaluate_0.14 lattice_0.20-41
## [70] purrr_0.3.4 labeling_0.3 cowplot_1.0.0
## [73] bit_1.1-15.2 tidyselect_1.1.0 magrittr_1.5
## [76] R6_2.4.1 generics_0.0.2 DBI_1.1.0
## [79] pillar_1.4.6 withr_2.2.0 RCurl_1.98-1.2
## [82] tibble_3.0.3 crayon_1.3.4 futile.options_1.0.1
## [85] rmarkdown_2.3 GetoptLong_1.0.2 progress_1.2.2
## [88] data.table_1.13.0 blob_1.2.1 digest_0.6.25
## [91] VennDiagram_1.6.20 gridGraphics_0.5-0 munsell_0.5.0
## [94] ggplotify_0.0.5